Smillie2019 (Smillie2019_human_IBD)

Cell type annotation

In [1]:
# load required modules
import scanpy as sc
import besca as bc
import pandas as pd
import numpy as np
import os

#import numpy as np
#from matplotlib import pyplot
#import sys

#setup document
sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)  # low dpi (dots per inch) yields small inline figures
sc.logging.print_versions()
./.conda/envs/besca_test/lib/python3.6/site-packages/scanpy/api/__init__.py:6: FutureWarning: 

In a future version of Scanpy, `scanpy.api` will be removed.
Simply use `import scanpy as sc` and `import scanpy.external as sce` instead.

  FutureWarning,
scanpy==1.4.5.post2 anndata==0.7.1 umap==0.3.10 numpy==1.17.5 scipy==1.4.1 pandas==0.24.1 scikit-learn==0.22.1 statsmodels==0.11.0 python-igraph==0.8.2 louvain==0.6.1
In [2]:
# define filepath (this is the folder that contains "raw" and "analyzed")
root_path = os.getcwd()

# input: results from standard workflow
analysis_name = 'standard_workflow_besca2_0'
results_folder = os.path.join(root_path, 'analyzed', analysis_name)
input_data = os.path.join(results_folder, analysis_name + '.h5ad') # specify a .h5ad file for storing the results

# define output directories
outdir_data = results_folder
outdir_figures = os.path.join(results_folder, 'figures')
outdir_results = os.path.join(results_folder, 'results')
sc.settings.figdir = os.path.join(outdir_figures)

os.makedirs(outdir_data, exist_ok=True)
os.makedirs(outdir_figures, exist_ok=True)
os.makedirs(outdir_results, exist_ok=True)
In [3]:
#reload our data from previously written out AnnData object
adata = sc.read(input_data)
In [4]:
adata
Out[4]:
AnnData object with n_obs × n_vars = 149732 × 2196 
    obs: 'CELL', 'Cluster', 'Health', 'Location', 'Subject', 'celltype_highlevel', 'nGene', 'nUMI', 'original_name', 'percent_mito', 'n_counts', 'n_genes', 'batch', 'leiden'
    var: 'ENSEMBL', 'SYMBOL', 'n_cells', 'total_counts', 'frac_reads'
    uns: 'leiden', 'leiden_colors', 'neighbors', 'pca', 'rank_genes_groups', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
In [5]:
adata.obs.head()
Out[5]:
CELL Cluster Health Location Subject celltype_highlevel nGene nUMI original_name percent_mito n_counts n_genes batch leiden
N7.EpiA.AAGCAAGAGTCAAC-Epi N7.EpiA.AAGCAAGAGTCAAC-Epi Cycling TA Non-inflamed Epi N7 Epi 1507 7428 N7.EpiA.AAGCAAGAGTCAAC 0.057351 7428.0 1507 N7 8
N7.EpiA.ACGAGGGAGCTGAT-Epi N7.EpiA.ACGAGGGAGCTGAT-Epi Enterocyte Progenitors Non-inflamed Epi N7 Epi 828 2877 N7.EpiA.ACGAGGGAGCTGAT 0.009037 2877.0 828 N7 0
N7.EpiA.ACGTTTACTGGTAC-Epi N7.EpiA.ACGTTTACTGGTAC-Epi Immature Enterocytes 2 Non-inflamed Epi N7 Epi 2318 15332 N7.EpiA.ACGTTTACTGGTAC 0.133707 15332.0 2318 N7 7
N7.EpiA.AGAGAATGGTCATG-Epi N7.EpiA.AGAGAATGGTCATG-Epi Enterocyte Progenitors Non-inflamed Epi N7 Epi 884 3498 N7.EpiA.AGAGAATGGTCATG 0.002001 3498.0 884 N7 7
N7.EpiA.AGAGCGGAGTATGC-Epi N7.EpiA.AGAGCGGAGTATGC-Epi TA 1 Non-inflamed Epi N7 Epi 858 3261 N7.EpiA.AGAGCGGAGTATGC 0.003067 3261.0 858 N7 0
In [ ]:
 
In [6]:
%matplotlib inline  
sc.settings.set_figure_params(dpi=90)
In [7]:
sc.pl.umap(adata, color=['leiden', 'Location'], legend_loc='on data', legend_fontsize=9)
In [8]:
sc.pl.umap(adata, color=['leiden', 'celltype_highlevel'], legend_loc='on data', legend_fontsize=9)
In [9]:
sc.pl.umap(adata, color=['leiden', 'Cluster'], legend_loc='on data', legend_fontsize=6)
In [10]:
sc.pl.umap(adata, color=['CD4', 'CD8A'])
In [11]:
sc.pl.umap(adata, color=['Cluster'], legend_fontsize=9)
In [12]:
# Load GMT file and calculate signature scores
import pkg_resources
gmt_file_IMM=pkg_resources.resource_filename('besca', 'datasets/genesets/HumanCD45p_scseqCMs6.gmt')
adata_with_scores = adata.copy()
bc.tl.sig.combined_signature_score(adata_with_scores, gmt_file_IMM)
computing score 'score_HumanCD45p_scseqCMs6_ActB_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_ActB_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Activation_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_Activation_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Basophil_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CCR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_Basophil_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Bcells_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Bcells_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_CCG1S_scanpyUP'
WARNING: genes are not in var_names and ignored: ['DSSC1']
    finished: added
    'score_HumanCD45p_scseqCMs6_CCG1S_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_CCG2M_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CSK2']
    finished: added
    'score_HumanCD45p_scseqCMs6_CCG2M_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Cafs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FDF7']
    finished: added
    'score_HumanCD45p_scseqCMs6_Cafs_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Cellcycle_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Cellcycle_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Checkpoint_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Checkpoint_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Cyto_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TNFA', 'IL4', 'IL7A', 'IL12', 'IL23', 'GM-CSF', 'GCSFCCL1', 'CCL12', 'CCL27', 'SDF1A', 'BCA1', 'MIP1B']
    finished: added
    'score_HumanCD45p_scseqCMs6_Cyto_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Cytotox_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Cytotox_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_DCR_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_DCR_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_DCrec_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_DCrec_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_DCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['LY6C1', 'SIGLECH']
    finished: added
    'score_HumanCD45p_scseqCMs6_DCs_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Eff_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Eff_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Endo_scanpyUP'
WARNING: genes are not in var_names and ignored: ['ITCAM1', 'PECAM1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Endo_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Endot_scanpyUP'
WARNING: genes are not in var_names and ignored: ['PECAM1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Endot_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Endothelial_scanpyUP'
WARNING: genes are not in var_names and ignored: ['PECAM1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Endothelial_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Eosinophil_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CCR3', 'SLIGLEC10']
    finished: added
    'score_HumanCD45p_scseqCMs6_Eosinophil_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Epith_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Epith_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_ExhB_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TILPL2']
    finished: added
    'score_HumanCD45p_scseqCMs6_ExhB_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Granulo_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Granulo_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_HLA_scanpyUP'
WARNING: genes are not in var_names and ignored: ['HLA-H', 'HLA-L', 'HLA-DRB2']
    finished: added
    'score_HumanCD45p_scseqCMs6_HLA_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_HLAP_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_HLAP_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_HLAS_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_HLAS_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Ifi_scanpyUP'
WARNING: genes are not in var_names and ignored: ['OAS1G']
    finished: added
    'score_HumanCD45p_scseqCMs6_Ifi_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Ifng_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Ifng_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Macrophage_scanpyUP'
WARNING: genes are not in var_names and ignored: ['ADGRE1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Macrophage_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Mast_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Mast_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Megakaryocytes_scanpyUP'
WARNING: genes are not in var_names and ignored: ['PECAM1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Megakaryocytes_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_MelMelan_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TYR', 'SLC24A5', 'MAGEA6', 'PRAME', 'PAX3', 'MLANA']
    finished: added
    'score_HumanCD45p_scseqCMs6_MelMelan_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_MelMesen_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CYR6']
    finished: added
    'score_HumanCD45p_scseqCMs6_MelMesen_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_MemB_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_MemB_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Memory_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Memory_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Mo14_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Mo14_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Mo16_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_Mo16_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_MoMa_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_MoMa_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Monocytes_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR3', 'FCGR1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Monocytes_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Myelo_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FCGR4', 'FCGR1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Myelo_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_MyeloSubtype_scanpyUP'
WARNING: genes are not in var_names and ignored: ['LY6G', 'CD177']
    finished: added
    'score_HumanCD45p_scseqCMs6_MyeloSubtype_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_NKT_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NKT_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_NKcells_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NKcells_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_NKcyt_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NKcyt_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_NKnai_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NKnai_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Naive_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Naive_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_NaiveB_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NaiveB_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Neutrophil_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Neutrophil_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_NonEff_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_NonEff_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_OMyelo_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_OMyelo_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Others_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Others_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Plasma_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Plasma_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Pyro_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Pyro_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Stemmess_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Stemmess_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_StemmessS_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_StemmessS_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Stromal_scanpyUP'
WARNING: genes are not in var_names and ignored: ['PECAM1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Stromal_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_T4CM_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TRADO']
    finished: added
    'score_HumanCD45p_scseqCMs6_T4CM_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TAM_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TAM_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_TAMCx_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TAMCx_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TEM_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TEM_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TMO_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TMO_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TMid_scanpyUP'
WARNING: genes are not in var_names and ignored: ['ANGTPL4']
    finished: added
    'score_HumanCD45p_scseqCMs6_TMid_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TNK_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TNK_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TStem_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TStem_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TStemhi_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TStemhi_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TSteml_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TSteml_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TStemlo_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TStemlo_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TTh1_scanpyUP'
WARNING: genes are not in var_names and ignored: ['SPP4', 'IFNA1']
    finished: added
    'score_HumanCD45p_scseqCMs6_TTh1_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_TTh17_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TTh17_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_TTh2_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CCR3', 'CCR8', 'CSCR4', 'IL4']
    finished: added
    'score_HumanCD45p_scseqCMs6_TTh2_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Tcd4_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcd4_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Tcd8_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcd8_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Tcells_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcells_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Tcgd_scanpyUP'
WARNING: genes are not in var_names and ignored: ['TRDV2', 'TRDV1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcgd_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Tcytox_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tcytox_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Teff_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Teff_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Tfh_scanpyUP'
WARNING: genes are not in var_names and ignored: ['FLAMF1']
    finished: added
    'score_HumanCD45p_scseqCMs6_Tfh_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_TilCM_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_TilCM_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Tpexh_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Tpexh_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Treg_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Treg_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_Ttexh_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Ttexh_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_Ubi_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_Ubi_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_UnivExh_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_UnivExh_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_UnivMem_scanpyUP'
WARNING: genes are not in var_names and ignored: ['CCXR3']
    finished: added
    'score_HumanCD45p_scseqCMs6_UnivMem_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_UnivNaive_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_UnivNaive_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_aDCs_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_aDCs_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_allSteml_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_allSteml_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_cDC1_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_cDC1_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_cDC2_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_cDC2_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_cDCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['PLET1']
    finished: added
    'score_HumanCD45p_scseqCMs6_cDCs_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_epDCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['SIGLECG', 'PLET1']
    finished: added
    'score_HumanCD45p_scseqCMs6_epDCs_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_general_scanpyUP'
WARNING: genes are not in var_names and ignored: ['ARG1']
    finished: added
    'score_HumanCD45p_scseqCMs6_general_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_moDC_scanpyUP'
    finished: added
    'score_HumanCD45p_scseqCMs6_moDC_scanpyUP', score of gene set (adata.obs) (0:00:02)
computing score 'score_HumanCD45p_scseqCMs6_pDCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['SIGLECH']
    finished: added
    'score_HumanCD45p_scseqCMs6_pDCs_scanpyUP', score of gene set (adata.obs) (0:00:03)
computing score 'score_HumanCD45p_scseqCMs6_uDCs_scanpyUP'
WARNING: genes are not in var_names and ignored: ['SIGLECG']
    finished: added
    'score_HumanCD45p_scseqCMs6_uDCs_scanpyUP', score of gene set (adata.obs) (0:00:02)
In [13]:
adata_with_scores
Out[13]:
AnnData object with n_obs × n_vars = 149732 × 2196 
    obs: 'CELL', 'Cluster', 'Health', 'Location', 'Subject', 'celltype_highlevel', 'nGene', 'nUMI', 'original_name', 'percent_mito', 'n_counts', 'n_genes', 'batch', 'leiden', 'score_HumanCD45p_scseqCMs6_ActB_scanpy', 'score_HumanCD45p_scseqCMs6_Activation_scanpy', 'score_HumanCD45p_scseqCMs6_Basophil_scanpy', 'score_HumanCD45p_scseqCMs6_Bcells_scanpy', 'score_HumanCD45p_scseqCMs6_CCG1S_scanpy', 'score_HumanCD45p_scseqCMs6_CCG2M_scanpy', 'score_HumanCD45p_scseqCMs6_Cafs_scanpy', 'score_HumanCD45p_scseqCMs6_Cellcycle_scanpy', 'score_HumanCD45p_scseqCMs6_Checkpoint_scanpy', 'score_HumanCD45p_scseqCMs6_Cyto_scanpy', 'score_HumanCD45p_scseqCMs6_Cytotox_scanpy', 'score_HumanCD45p_scseqCMs6_DCR_scanpy', 'score_HumanCD45p_scseqCMs6_DCrec_scanpy', 'score_HumanCD45p_scseqCMs6_DCs_scanpy', 'score_HumanCD45p_scseqCMs6_Eff_scanpy', 'score_HumanCD45p_scseqCMs6_Endo_scanpy', 'score_HumanCD45p_scseqCMs6_Endot_scanpy', 'score_HumanCD45p_scseqCMs6_Endothelial_scanpy', 'score_HumanCD45p_scseqCMs6_Eosinophil_scanpy', 'score_HumanCD45p_scseqCMs6_Epith_scanpy', 'score_HumanCD45p_scseqCMs6_ExhB_scanpy', 'score_HumanCD45p_scseqCMs6_Granulo_scanpy', 'score_HumanCD45p_scseqCMs6_HLA_scanpy', 'score_HumanCD45p_scseqCMs6_HLAP_scanpy', 'score_HumanCD45p_scseqCMs6_HLAS_scanpy', 'score_HumanCD45p_scseqCMs6_Ifi_scanpy', 'score_HumanCD45p_scseqCMs6_Ifng_scanpy', 'score_HumanCD45p_scseqCMs6_Macrophage_scanpy', 'score_HumanCD45p_scseqCMs6_Mast_scanpy', 'score_HumanCD45p_scseqCMs6_Megakaryocytes_scanpy', 'score_HumanCD45p_scseqCMs6_MelMelan_scanpy', 'score_HumanCD45p_scseqCMs6_MelMesen_scanpy', 'score_HumanCD45p_scseqCMs6_MemB_scanpy', 'score_HumanCD45p_scseqCMs6_Memory_scanpy', 'score_HumanCD45p_scseqCMs6_Mo14_scanpy', 'score_HumanCD45p_scseqCMs6_Mo16_scanpy', 'score_HumanCD45p_scseqCMs6_MoMa_scanpy', 'score_HumanCD45p_scseqCMs6_Monocytes_scanpy', 'score_HumanCD45p_scseqCMs6_Myelo_scanpy', 'score_HumanCD45p_scseqCMs6_MyeloSubtype_scanpy', 'score_HumanCD45p_scseqCMs6_NKT_scanpy', 'score_HumanCD45p_scseqCMs6_NKcells_scanpy', 'score_HumanCD45p_scseqCMs6_NKcyt_scanpy', 'score_HumanCD45p_scseqCMs6_NKnai_scanpy', 'score_HumanCD45p_scseqCMs6_Naive_scanpy', 'score_HumanCD45p_scseqCMs6_NaiveB_scanpy', 'score_HumanCD45p_scseqCMs6_Neutrophil_scanpy', 'score_HumanCD45p_scseqCMs6_NonEff_scanpy', 'score_HumanCD45p_scseqCMs6_OMyelo_scanpy', 'score_HumanCD45p_scseqCMs6_Others_scanpy', 'score_HumanCD45p_scseqCMs6_Plasma_scanpy', 'score_HumanCD45p_scseqCMs6_Pyro_scanpy', 'score_HumanCD45p_scseqCMs6_Stemmess_scanpy', 'score_HumanCD45p_scseqCMs6_StemmessS_scanpy', 'score_HumanCD45p_scseqCMs6_Stromal_scanpy', 'score_HumanCD45p_scseqCMs6_T4CM_scanpy', 'score_HumanCD45p_scseqCMs6_TAM_scanpy', 'score_HumanCD45p_scseqCMs6_TAMCx_scanpy', 'score_HumanCD45p_scseqCMs6_TEM_scanpy', 'score_HumanCD45p_scseqCMs6_TMO_scanpy', 'score_HumanCD45p_scseqCMs6_TMid_scanpy', 'score_HumanCD45p_scseqCMs6_TNK_scanpy', 'score_HumanCD45p_scseqCMs6_TStem_scanpy', 'score_HumanCD45p_scseqCMs6_TStemhi_scanpy', 'score_HumanCD45p_scseqCMs6_TSteml_scanpy', 'score_HumanCD45p_scseqCMs6_TStemlo_scanpy', 'score_HumanCD45p_scseqCMs6_TTh1_scanpy', 'score_HumanCD45p_scseqCMs6_TTh17_scanpy', 'score_HumanCD45p_scseqCMs6_TTh2_scanpy', 'score_HumanCD45p_scseqCMs6_Tcd4_scanpy', 'score_HumanCD45p_scseqCMs6_Tcd8_scanpy', 'score_HumanCD45p_scseqCMs6_Tcells_scanpy', 'score_HumanCD45p_scseqCMs6_Tcgd_scanpy', 'score_HumanCD45p_scseqCMs6_Tcytox_scanpy', 'score_HumanCD45p_scseqCMs6_Teff_scanpy', 'score_HumanCD45p_scseqCMs6_Tfh_scanpy', 'score_HumanCD45p_scseqCMs6_TilCM_scanpy', 'score_HumanCD45p_scseqCMs6_Tpexh_scanpy', 'score_HumanCD45p_scseqCMs6_Treg_scanpy', 'score_HumanCD45p_scseqCMs6_Ttexh_scanpy', 'score_HumanCD45p_scseqCMs6_Ubi_scanpy', 'score_HumanCD45p_scseqCMs6_UnivExh_scanpy', 'score_HumanCD45p_scseqCMs6_UnivMem_scanpy', 'score_HumanCD45p_scseqCMs6_UnivNaive_scanpy', 'score_HumanCD45p_scseqCMs6_aDCs_scanpy', 'score_HumanCD45p_scseqCMs6_allSteml_scanpy', 'score_HumanCD45p_scseqCMs6_cDC1_scanpy', 'score_HumanCD45p_scseqCMs6_cDC2_scanpy', 'score_HumanCD45p_scseqCMs6_cDCs_scanpy', 'score_HumanCD45p_scseqCMs6_epDCs_scanpy', 'score_HumanCD45p_scseqCMs6_general_scanpy', 'score_HumanCD45p_scseqCMs6_moDC_scanpy', 'score_HumanCD45p_scseqCMs6_pDCs_scanpy', 'score_HumanCD45p_scseqCMs6_uDCs_scanpy'
    var: 'ENSEMBL', 'SYMBOL', 'n_cells', 'total_counts', 'frac_reads'
    uns: 'leiden', 'leiden_colors', 'neighbors', 'pca', 'rank_genes_groups', 'umap', 'Location_colors', 'celltype_highlevel_colors', 'Cluster_colors'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
In [14]:
sc.pl.umap(adata_with_scores, color=['score_HumanCD45p_scseqCMs6_Tcells_scanpy',
                                     'score_HumanCD45p_scseqCMs6_Cellcycle_scanpy',
                                     'score_HumanCD45p_scseqCMs6_Endothelial_scanpy'], legend_loc='on data',legend_fontsize=6)
In [15]:
mymarkers=bc.datasets.load_immune_signatures()
setName = 'HumanCD45p_scseqCMs6'

# Fract_pos was exported by BESCA in the standard worflow
f=pd.read_csv(results_folder + "/labelings/leiden/fract_pos.gct",sep="\t",skiprows=2)
df=bc.tl.sig.score_mw(f,mymarkers)
myc=np.median(df.loc[setName+'_Ubi',:]*2/3)

cNames=bc.tl.sig.make_anno(df,myc,setName,f, 0.3)
adata=bc.tl.sig.add_anno(adata,cNames, 'leiden')
In [16]:
sc.pl.umap(adata, color=['cell_names','sscell_group', 'leiden'], legend_loc='on data',legend_fontsize=6)
sc.pl.umap(adata, color=['cell_group','scell_group', 'leiden'], legend_loc='on data',legend_fontsize=6)
... storing 'cell_names' as categorical
... storing 'cell_group' as categorical
... storing 'scell_group' as categorical
... storing 'sscell_group' as categorical
In [ ]:
 
In [ ]:
 
In [17]:
sorted(adata.obs.get('Cluster').unique(), key=str)
Out[17]:
['Best4+ Enterocytes',
 'CD4+ Activated Fos-hi',
 'CD4+ Activated Fos-lo',
 'CD4+ Memory',
 'CD4+ PD1+',
 'CD69+ Mast',
 'CD69- Mast',
 'CD8+ IELs',
 'CD8+ IL17+',
 'CD8+ LP',
 'Cycling B',
 'Cycling Monocytes',
 'Cycling T',
 'Cycling TA',
 'DC1',
 'DC2',
 'Endothelial',
 'Enterocyte Progenitors',
 'Enterocytes',
 'Enteroendocrine',
 'Follicular',
 'GC',
 'Glia',
 'Goblet',
 'ILCs',
 'Immature Enterocytes 1',
 'Immature Enterocytes 2',
 'Immature Goblet',
 'Inflammatory Fibroblasts',
 'Inflammatory Monocytes',
 'M cells',
 'Macrophages',
 'Microvascular',
 'Myofibroblasts',
 'NKs',
 'Pericytes',
 'Plasma',
 'Post-capillary Venules',
 'RSPO3+',
 'Secretory TA',
 'Stem',
 'TA 1',
 'TA 2',
 'Tregs',
 'Tuft',
 'WNT2B+ Fos-hi',
 'WNT2B+ Fos-lo 1',
 'WNT2B+ Fos-lo 2',
 'WNT5B+ 1',
 'WNT5B+ 2']
In [18]:
#write down new cluster names (important order needs to be equivalent to above)
new_cluster_names = ['enterocyte', 'activated CD4-positive, alpha-beta T cell', 'activated CD4-positive, alpha-beta T cell', 
                     'CD4-positive, alpha-beta memory T cell', 'exhausted-like CD4-positive, alpha-beta T cell', 'mast cell', 
                     'mast cell', 'CD8-positive, alpha-beta T cell', 
                     'CD8-positive, alpha-beta cytokine secreting effector T cell', 'CD8-positive, alpha-beta T cell', 
                     'proliferating B cell', 'proliferating monocyte', 'proliferating T cell', 
                     'proliferating transit amplifying cell', 'CD141-positive myeloid dendritic cell', 
                     'CD1c-positive myeloid dendritic cell', 'endothelial cell', 'enterocyte progenitor', 'enterocyte', 
                     'enteroendocrine cell', 'follicular B cell', 'germinal center B cell', 'glial cell', 'goblet cell', 
                     'innate lymphoid cell', 'immature enterocyte', 'immature enterocyte', 'immature goblet cell', 
                     'inflammatory fibroblast', 'inflammatory monocyte', 'microfold cell', 'macrophage', 
                     'microvascular endothelial cell', 'myofibroblast cell', 'natural killer cell', 'pericyte cell', 
                     'plasma cell', 'HEV endothelial cell', 'fibroblast', 'transit amplifying cell', 'stem cell', 
                     'transit amplifying cell', 'transit amplifying cell', 'regulatory T cell', 'brush cell', 'fibroblast', 
                     'fibroblast', 'fibroblast', 'fibroblast', 'fibroblast']

    
bc.tl.annotate_cells_clustering(adata=adata, clustering_label='Cluster', new_annotation_label='dblabel', new_cluster_labels=new_cluster_names)
In [19]:
#write down new cluster names (important order needs to be equivalent to above)
new_cluster_names = ['epithelial cell', 'plasma cell', 'plasma cell', 'T cell or ILC', 'plasma cell', 'fibroblast',
                     'goblet cell', 'enterocyte', 'epithelial cell', 'plasma cell', 'B cell', 'myeloid leukocyte', 
                     'macrophage', 'endothelial cell', 'fibroblast', 'regulatory T cell', 'epithelial cell', 
                     'myofibroblast cell', 'enterocyte', 'B cell', 'mast cell', 'neural cell', 'pericyte cell']

    
bc.tl.annotate_cells_clustering(adata=adata, clustering_label='leiden', new_annotation_label='celltype', new_cluster_labels=new_cluster_names)
In [ ]:
 
In [20]:
#write down new cluster names (important order needs to be equivalent to above)
new_cluster_names = ['0: epithelial cell', '1: plasma cell', '2: plasma cell', '3: T cell or ILC', '4: plasma cell', 
                     '5: fibroblast', '6: goblet cell', '7: enterocyte', '8: epithelial cell', '9: plasma cell', 
                     '10: B cell', '11: myeloid leukocyte', '12: macrophage', '13: endothelial cell', '14: fibroblast', 
                     '15: regulatory T cell', '16: epithelial cell', '17: myofibroblast cell', '18: enterocyte', 
                     '19: B cell', '20: mast cell', '21: neural cell', '22: pericyte cell']

    
bc.tl.annotate_cells_clustering(adata=adata, clustering_label='leiden', new_annotation_label='cluster_celltype', new_cluster_labels=new_cluster_names)
In [ ]:
 
In [21]:
adata = bc.st.additional_labeling(adata, 'dblabel', 'dblabel', 'Curated celltype annotation.', 'Klas Hatje', outdir_data)
ranking genes
... storing 'dblabel' as categorical
... storing 'celltype' as categorical
... storing 'cluster_celltype' as categorical
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:06:38)
rank genes per label calculated using method wilcoxon.
mapping of cells to  dblabel exported successfully to cell2labels.tsv
average.gct exported successfully to file
fract_pos.gct exported successfully to file
labelinfo.tsv successfully written out
./analyzed/standard_workflow_besca2_0/labelings/dblabel/WilxRank.gct written out
./analyzed/standard_workflow_besca2_0/labelings/dblabel/WilxRank.pvalues.gct written out
./analyzed/standard_workflow_besca2_0/labelings/dblabel/WilxRank.logFC.gct written out
In [22]:
adata = bc.st.additional_labeling(adata, 'celltype', 'celltype', 'Manual celltype annotation.', 'Klas Hatje', outdir_data)
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:05:39)
rank genes per label calculated using method wilcoxon.
mapping of cells to  celltype exported successfully to cell2labels.tsv
average.gct exported successfully to file
fract_pos.gct exported successfully to file
labelinfo.tsv successfully written out
./analyzed/standard_workflow_besca2_0/labelings/celltype/WilxRank.gct written out
./analyzed/standard_workflow_besca2_0/labelings/celltype/WilxRank.pvalues.gct written out
./analyzed/standard_workflow_besca2_0/labelings/celltype/WilxRank.logFC.gct written out
In [23]:
adata = bc.st.additional_labeling(adata, 'celltype_highlevel', 'Smillie2019_celltype_highlevel', 'Coarse cell type annotation from authors [Smillie2019].', 'Smillie et al', outdir_data)
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:05:10)
rank genes per label calculated using method wilcoxon.
mapping of cells to  celltype_highlevel exported successfully to cell2labels.tsv
average.gct exported successfully to file
fract_pos.gct exported successfully to file
labelinfo.tsv successfully written out
./analyzed/standard_workflow_besca2_0/labelings/Smillie2019_celltype_highlevel/WilxRank.gct written out
./analyzed/standard_workflow_besca2_0/labelings/Smillie2019_celltype_highlevel/WilxRank.pvalues.gct written out
./analyzed/standard_workflow_besca2_0/labelings/Smillie2019_celltype_highlevel/WilxRank.logFC.gct written out
In [24]:
adata = bc.st.additional_labeling(adata, 'Cluster', 'Smillie2019_Cluster', 'Cell type cluster annotation from authors [Smillie2019].', 'Smillie et al', outdir_data)
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:07:08)
rank genes per label calculated using method wilcoxon.
mapping of cells to  Cluster exported successfully to cell2labels.tsv
average.gct exported successfully to file
fract_pos.gct exported successfully to file
labelinfo.tsv successfully written out
./analyzed/standard_workflow_besca2_0/labelings/Smillie2019_Cluster/WilxRank.gct written out
./analyzed/standard_workflow_besca2_0/labelings/Smillie2019_Cluster/WilxRank.pvalues.gct written out
./analyzed/standard_workflow_besca2_0/labelings/Smillie2019_Cluster/WilxRank.logFC.gct written out
In [ ]:
 
In [25]:
adata.write(filename=os.path.join(outdir_data, 'Smillie2019_processed.h5ad'), )